Data Exploration

Hist plots

Creating New Variables

eff <- read_csv_fn( "data/raw-data/CPUE.csv", proj) %>%
  arrange(year)

str(eff)
## 'data.frame':    140728 obs. of  37 variables:
##  $ year         : int  2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 ...
##  $ stationkey   : num  4.25e+08 4.25e+08 4.25e+08 4.25e+08 4.25e+08 ...
##  $ vessel       : int  4 4 4 4 4 4 4 4 4 4 ...
##  $ cruise       : int  247 247 247 247 247 247 247 247 247 247 ...
##  $ station      : int  122 122 122 122 122 122 122 122 122 122 ...
##  $ startlat     : num  27.5 27.5 27.5 27.5 27.5 ...
##  $ startlon     : num  -97.2 -97.2 -97.2 -97.2 -97.2 ...
##  $ startdepth   : num  16.3 16.3 16.3 16.3 16.3 ...
##  $ start_gmt    : Factor w/ 2870 levels "10/11/2004 20:36",..: 1001 1001 1001 1001 1001 1001 1001 1001 1001 1001 ...
##  $ area         : Factor w/ 1 level "Gulf of Mexico": 1 1 1 1 1 1 1 1 1 1 ...
##  $ llinegear    : Factor w/ 1 level "BL": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hooktype     : Factor w/ 1 level "C": 1 1 1 1 1 1 1 1 1 1 ...
##  $ llinehooksize: int  15 15 15 15 15 15 15 15 15 15 ...
##  $ numberhooks  : int  100 100 100 100 100 100 100 100 100 100 ...
##  $ hookbait     : Factor w/ 1 level "MACKEREL": 1 1 1 1 1 1 1 1 1 1 ...
##  $ EFFORT       : int  65 65 65 65 65 65 65 65 65 65 ...
##  $ Settling.rate: num  2.93 2.93 2.93 2.93 2.93 ...
##  $ Soak.Time    : num  62.1 62.1 62.1 62.1 62.1 ...
##  $ hookcount    : int  100 100 100 100 100 100 100 100 100 100 ...
##  $ fishcount    : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ tempbotm     : num  28.8 28.8 28.8 28.8 28.8 ...
##  $ fluorobotm   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ turbbotm     : num  70.5 70.5 70.5 70.5 70.5 ...
##  $ oxybotm      : num  5.6 5.6 5.6 5.6 5.6 5.6 5.6 5.6 5.6 5.6 ...
##  $ salbotm      : num  36.6 36.6 36.6 36.6 36.6 ...
##  $ gom_rckv     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ gom_gvlv     : num  0.0352 0.0352 0.0352 0.0352 0.0352 ...
##  $ gom_sndv     : num  64.6 64.6 64.6 64.6 64.6 ...
##  $ gom_mudv     : num  35.4 35.4 35.4 35.4 35.4 ...
##  $ gom_folk     : int  32 32 32 32 32 32 32 32 32 32 ...
##  $ gom_domnc    : int  20 20 20 20 20 20 20 20 20 20 ...
##  $ bottype8     : Factor w/ 9 levels "","Gravel Dominant",..: 8 8 8 8 8 8 8 8 8 8 ...
##  $ bottype4     : Factor w/ 5 levels "","Gravel","Mud",..: 5 5 5 5 5 5 5 5 5 5 ...
##  $ TAXON        : Factor w/ 49 levels "C_ACR_4_FA","C_ACR_4_FJ",..: 25 1 2 3 4 5 6 7 8 9 ...
##  $ CNT          : int  1 0 0 0 0 0 0 0 0 0 ...
##  $ CPUE         : num  0.923 0 0 0 0 ...
##  $ Adj.CPUE     : num  0.967 0 0 0 0 ...
headtail(eff)

Startdepth instead of AVG.DEPTH

### Find STARTDEPTH and ENDDEPTH Variables

head(eff[,"startdepth"])
## [1] 16.27632 16.27632 16.27632 16.27632 16.27632 16.27632
### Average STARTDEPTH and ENDDEPTH and save as AVG.DEPTH

#eff$AVG.DEPTH <- apply(eff[,c(8,12)], 1, FUN=mean) 

#range(eff$AVG.DEPTH, na.rm = TRUE)

Month

head(eff$start_gmt)
## [1] 8/24/2001 2:35 8/24/2001 2:35 8/24/2001 2:35 8/24/2001 2:35
## [5] 8/24/2001 2:35 8/24/2001 2:35
## 2870 Levels: 10/11/2004 20:36 10/11/2004 23:49 ... 9/9/2017 7:25
str(eff$start_gmt)
##  Factor w/ 2870 levels "10/11/2004 20:36",..: 1001 1001 1001 1001 1001 1001 1001 1001 1001 1001 ...
### Format START_GMT as Date

eff$start_gmt <- strptime(eff$start_gmt, format = "%m/%d/%Y")

str(eff$start_gmt)
##  POSIXlt[1:140728], format: "2001-08-24" "2001-08-24" "2001-08-24" "2001-08-24" "2001-08-24" ...
eff$month <- format.Date(eff$start_gmt, format = "%b")

str(eff$month)
##  chr [1:140728] "Aug" "Aug" "Aug" "Aug" "Aug" "Aug" "Aug" "Aug" "Aug" ...
unique(eff$month)
## [1] "Aug" "Jul" "Sep" "Oct" "Nov"

Distance to shore

require(rgdal)   # for readOGR(...); loads package sp as well
require(rgeos)   # for gDistance(...)


require(parallel) # for detect cores
require(foreach)   # for foreach(...)
require(snow)      # for makeCluster(...)
require(doSNOW)    # for resisterDoSNOW(...)
eff$Long <- eff$startlon
eff$Lat <- eff$startlat

head(eff)
str(eff)

wgs.84    <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
mollweide <- "+proj=moll +lon_0=0 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"
sp.points <- SpatialPoints(eff[,c("Long","Lat")], proj4string=CRS(wgs.84))

path <- "shape-file/ne_10m_coastline/ne_10m_coastline.shp"

coast  <- rgdal::readOGR(dsn = path,layer="ne_10m_coastline",p4s=wgs.84)

coast.moll <- spTransform(coast,CRS(mollweide))

point.moll <- spTransform(sp.points,CRS(mollweide))

no_cores <- detectCores()

cl <- makeCluster(no_cores,type="SOCK")  # create a 4-processor cluster
registerDoSNOW(cl)                # register the cluster

get.dist.parallel <- function(n) {
  foreach(i=1:n, .combine=c, .packages="rgeos", .inorder=TRUE, 
          .export=c("point.moll","coast.moll")) %dopar% gDistance(point.moll[i],coast.moll)
}

eff$Dis.to.SHORE <- get.dist.parallel(length(sp.points))

eff$Dis.to.SHORE <- eff$Dis.to.SHORE/1000

head(eff)
tail(eff)
str(eff)

stopCluster(cl)
## Warning: use rgdal::readOGR or sf::st_read

Create Species, Sex, Maturity, and common name variables

###write.csv(eff,file = "eff.csv", row.names = FALSE)
#eff <- read.csv("eff.csv") ### bring in eff.csv

str(eff)


  red.df <- eff %>% ### create reduced data frame
    select(year, month, startlat, startlon, start_gmt, 
           startdepth, tempbotm, fluorobotm, turbbotm, 
           oxybotm, salbotm, bottype4, Dis.to.SHORE, TAXON, CPUE,Adj.CPUE)
  
str(red.df)
tail(red.df)


red.df$TAXON <- as.character(red.df$TAXON) ### make TAXON variable character so it can be supplied to strsplit()

split_TAXON <- strsplit(red.df$TAXON, "_4_") ### split TAXON variable in tspecies and Sex Maturity segments seperated by _4_

    ### Check everything is good

split_TAXON[[1]][2]
red.df$TAXON[1]

split_TAXON[[5]][2]
red.df$TAXON[5]

split_TAXON[[1]][1]


str(split_TAXON)

### Create empty species vector
species <- numeric(length(split_TAXON))

for(i in 1:length(split_TAXON)){
  
  species[i] <- split_TAXON[[i]][1] ### fill in empty species vector with species from first component of split_TAXON list
  
  split_TAXON[[i]] <- split_TAXON[[i]][2] ### create a new list with just Sex and Maturity
  
}

### make split_TAXON list a vector so it can be supplied to strsplt()
split_TAXON_vec <- unlist(split_TAXON)

### separate Sex and Maturity
split_sex_mat <- strsplit(split_TAXON_vec, split = "")

str(split_sex_mat)


### create emplty sex and maturity vectors
sex <- numeric(length(split_sex_mat))

maturity <- numeric(length(split_sex_mat))

### fill in sex and maturity
for(i in 1:length(split_sex_mat)){
  
  sex[i] <- unlist(split_sex_mat[[i]][1])

  maturity[i] <- unlist(split_sex_mat[[i]][2])
  
}
### check everything is good
head(sex)
head(maturity)
head(red.df$TAXON)


### add species, sex, and maturity variables to red.df
red.df$species <- species
red.df$sex <- sex 
red.df$maturity <- maturity

### check
str(red.df)
tail(red.df) ### good


### rename species with common names

unique(red.df$species) %>% sort() ### interesting only 7 species
### No silky this makes sense

red.df$common <- numeric(nrow(red.df))


for(i in 1:nrow(red.df)){
  
  if( red.df$species[i] == "C_ACR" ){ red.df$common[i] <- "Blacknose" }
  if( red.df$species[i] == "C_BRE" ){ red.df$common[i] <- "Spinner" }
  if( red.df$species[i] == "C_LEU" ){ red.df$common[i] <- "Bull" }
  if( red.df$species[i] == "C_LIM" ){ red.df$common[i] <- "Blacktip" }
  if( red.df$species[i] == "C_PLU" ){ red.df$common[i] <- "Sandbar" }
  if( red.df$species[i] == "G_CUV" ){ red.df$common[i] <- "Tiger" }
  if( red.df$species[i] == "R_TER" ){ red.df$common[i] <- "Sharpnose" }
  
}




### check
str(red.df)
tail(red.df)

Create Presence Absence Variable

### Add presence absence variable
red.df$pres <- numeric(nrow(red.df))

for(i in 1:nrow(red.df)){
    
    if(red.df$CPUE[i] > 0){
      
      red.df$pres[i] <- 1
      
    } else if(red.df$CPUE[i] == 0){
      
      red.df$pres[i] <- 0
      
    } else{
      
      red.df$pres[i] <- "NA"
      
    }
}

### check
str(red.df)
tail(red.df)

Saving changes to CPUE2.csv

#8-15-18#write_csv_fn( red.df, file = "data/raw-data/CPUE2.csv", row.names = FALSE, proj)

QA/QC

1. Load CPUE2.csv data, Extract CPUE for all species and save in a list called sp.df

eff2 <- read_csv_fn( "data/raw-data/CPUE2.csv", proj)

str(eff2)
## 'data.frame':    140728 obs. of  21 variables:
##  $ year        : int  2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 ...
##  $ month       : Factor w/ 5 levels "Aug","Jul","Nov",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ startlat    : num  27.5 27.5 27.5 27.5 27.5 ...
##  $ startlon    : num  -97.2 -97.2 -97.2 -97.2 -97.2 ...
##  $ start_gmt   : Factor w/ 602 levels "2001-07-31","2001-08-01",..: 22 22 22 22 22 22 22 22 22 22 ...
##  $ startdepth  : num  16.3 16.3 16.3 16.3 16.3 ...
##  $ tempbotm    : num  28.8 28.8 28.8 28.8 28.8 ...
##  $ fluorobotm  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ turbbotm    : num  70.5 70.5 70.5 70.5 70.5 ...
##  $ oxybotm     : num  5.6 5.6 5.6 5.6 5.6 5.6 5.6 5.6 5.6 5.6 ...
##  $ salbotm     : num  36.6 36.6 36.6 36.6 36.6 ...
##  $ bottype4    : Factor w/ 5 levels "","Gravel","Mud",..: 5 5 5 5 5 5 5 5 5 5 ...
##  $ Dis.to.SHORE: num  4.16 4.16 4.16 4.16 4.16 ...
##  $ TAXON       : Factor w/ 49 levels "C_ACR_4_FA","C_ACR_4_FJ",..: 25 1 2 3 4 5 6 7 8 9 ...
##  $ CPUE        : num  0.923 0 0 0 0 ...
##  $ Adj.CPUE    : num  0.967 0 0 0 0 ...
##  $ species     : Factor w/ 7 levels "C_ACR","C_BRE",..: 4 1 1 1 1 1 1 1 2 2 ...
##  $ sex         : Factor w/ 3 levels "F","M","U": 2 1 1 1 2 2 2 3 1 1 ...
##  $ maturity    : Factor w/ 3 levels "A","J","U": 1 1 2 3 1 2 3 3 1 2 ...
##  $ common      : Factor w/ 7 levels "Blacknose","Blacktip",..: 2 1 1 1 1 1 1 1 6 6 ...
##  $ pres        : int  1 0 0 0 0 0 0 0 0 0 ...
headtail(eff2)
Species_Vec <- levels(eff2$common)


sp.lst <- list(blacknose =  subset(eff2, common == Species_Vec[1]),
              
              blacktip = subset(eff2, common == Species_Vec[2]),
              
              bull = subset(eff2, common == Species_Vec[3]),
              
              sandbar = subset(eff2, common == Species_Vec[4]),
              
              sharpnose = subset(eff2, common == Species_Vec[5]),
              
              spinner = subset(eff2, common == Species_Vec[6]),
              
              tiger = subset(eff2, common == Species_Vec[7]))

str(sp.lst[[1]])
## 'data.frame':    20104 obs. of  21 variables:
##  $ year        : int  2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 ...
##  $ month       : Factor w/ 5 levels "Aug","Jul","Nov",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ startlat    : num  27.5 27.5 27.5 27.5 27.5 ...
##  $ startlon    : num  -97.2 -97.2 -97.2 -97.2 -97.2 ...
##  $ start_gmt   : Factor w/ 602 levels "2001-07-31","2001-08-01",..: 22 22 22 22 22 22 22 22 22 22 ...
##  $ startdepth  : num  16.3 16.3 16.3 16.3 16.3 ...
##  $ tempbotm    : num  28.8 28.8 28.8 28.8 28.8 ...
##  $ fluorobotm  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ turbbotm    : num  70.5 70.5 70.5 70.5 70.5 ...
##  $ oxybotm     : num  5.6 5.6 5.6 5.6 5.6 5.6 5.6 5.6 5.6 5.6 ...
##  $ salbotm     : num  36.6 36.6 36.6 36.6 36.6 ...
##  $ bottype4    : Factor w/ 5 levels "","Gravel","Mud",..: 5 5 5 5 5 5 5 3 3 3 ...
##  $ Dis.to.SHORE: num  4.16 4.16 4.16 4.16 4.16 ...
##  $ TAXON       : Factor w/ 49 levels "C_ACR_4_FA","C_ACR_4_FJ",..: 1 2 3 4 5 6 7 1 2 3 ...
##  $ CPUE        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Adj.CPUE    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ species     : Factor w/ 7 levels "C_ACR","C_BRE",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ sex         : Factor w/ 3 levels "F","M","U": 1 1 1 2 2 2 3 1 1 1 ...
##  $ maturity    : Factor w/ 3 levels "A","J","U": 1 2 3 1 2 3 3 1 2 3 ...
##  $ common      : Factor w/ 7 levels "Blacknose","Blacktip",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ pres        : int  0 0 0 0 0 0 0 0 0 0 ...
head(sp.lst[[5]])

2. Plot CPUE for all Species vs all Explanatory variables

Lat Lon

## Loading required package: rgdal
## Warning: package 'rgdal' was built under R version 3.5.1
## Loading required package: sp
## Warning: package 'sp' was built under R version 3.5.1
## rgdal: version: 1.3-3, (SVN revision 759)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
##  Path to GDAL shared files: C:/Users/W10039893/Documents/R/win-library/3.5/rgdal/gdal
##  GDAL binary built with GEOS: TRUE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: C:/Users/W10039893/Documents/R/win-library/3.5/rgdal/proj
##  Linking to sp version: 1.3-1
## Loading required package: rgeos
## Warning: package 'rgeos' was built under R version 3.5.1
## rgeos version: 0.3-28, (SVN revision 572)
##  GEOS runtime version: 3.6.1-CAPI-1.10.1 r0 
##  Linking to sp version: 1.3-1 
##  Polygon checking: TRUE
## Warning: use rgdal::readOGR or sf::st_read

Temp

Fluorobotm

Turbidity

Oxybotm

Salinity

Distance to shore

Blacknose

No startdepth over 100. salbotm greater than 40 looks like 1 outlier at 0.

Blacktip

Bull

Dis.to.SHORE

Sandbar

Sharpnose

Spinner

Juveniles may occupy larger temperature range than adults

Tiger

Not enough data in Adult to make separate maturity. Seems to be a lack of unique values CPUE is clustered around whole numbers.

3. Identify outliers

var_vec <- names(sp.lst[[1]][,c(1,3,4,6:13,15)])

eff2 %>% select(var_vec) %>% summary()
##       year         startlat        startlon        startdepth    
##  Min.   :2001   Min.   :24.98   Min.   :-97.30   Min.   :  9.00  
##  1st Qu.:2003   1st Qu.:27.33   1st Qu.:-93.22   1st Qu.: 28.48  
##  Median :2008   Median :28.33   Median :-87.72   Median : 54.00  
##  Mean   :2008   Mean   :28.10   Mean   :-88.71   Mean   : 79.75  
##  3rd Qu.:2013   3rd Qu.:29.06   3rd Qu.:-84.24   3rd Qu.: 99.38  
##  Max.   :2017   Max.   :30.35   Max.   :-81.59   Max.   :375.00  
##                                                                  
##     tempbotm       fluorobotm       turbbotm         oxybotm      
##  Min.   : 8.61   Min.   : 0.00   Min.   :  0.00   Min.   : 0.000  
##  1st Qu.:18.77   1st Qu.: 0.36   1st Qu.: 63.92   1st Qu.: 3.704  
##  Median :22.81   Median : 1.28   Median : 78.34   Median : 4.400  
##  Mean   :22.65   Mean   : 2.31   Mean   : 74.12   Mean   : 4.496  
##  3rd Qu.:27.87   3rd Qu.: 2.93   3rd Qu.: 91.12   3rd Qu.: 5.433  
##  Max.   :31.86   Max.   :62.82   Max.   :122.36   Max.   :10.702  
##  NA's   :5929    NA's   :50813   NA's   :9114     NA's   :6125    
##     salbotm        bottype4      Dis.to.SHORE          CPUE        
##  Min.   :17.87         :  931   Min.   :  2.159   Min.   : 0.0000  
##  1st Qu.:35.63   Gravel:14700   1st Qu.: 44.643   1st Qu.: 0.0000  
##  Median :36.18   Mud   :64484   Median : 78.234   Median : 0.0000  
##  Mean   :35.69   Rock  : 6076   Mean   : 88.820   Mean   : 0.1429  
##  3rd Qu.:36.41   Sand  :54537   3rd Qu.:128.262   3rd Qu.: 0.0000  
##  Max.   :48.79                  Max.   :236.487   Max.   :70.0000  
##  NA's   :5978
range(eff2$tempbotm, na.rm=TRUE) # 8.6 to 31.8 C
## [1]  8.6102 31.8547
range(eff2$startdepth, na.rm=TRUE)  # 9 to 375
## [1]   9 375
range(eff2$fluorobotm, na.rm=TRUE) # 0 to 62.8
## [1]  0.0000 62.8207
range(eff2$turbbotm, na.rm=TRUE) # 0 to 122.4
## [1]   0.0000 122.3585
range(eff2$oxybotm, na.rm=TRUE) # 0 to 10.7
## [1]  0.00000 10.70234
range(eff2$salbotm, na.rm=TRUE) # 17.9 to 48.8
## [1] 17.8695 48.7922
range(eff2$Dis.to.SHORE, na.rm=TRUE) # 2.15 to 236.5 km
## [1]   2.15896 236.48727

Lat Lon QA/QC

All looks good here.

Startdepth QA/AC

range(eff2$startdepth, na.rm=TRUE) # 9 to 375

plot(CPUE ~ startdepth,
     data = eff2)

I don’t think there are any outliers here. Very little positive catch past about 160 m, however, there is no way I should remove those zeros. There are no NAs in startdepth.

Temperature QA/QC

Temp Reasonable temps are from 50 F (10 C) in winter to 88 F (31 C) summer [https://www.nodc.noaa.gov/dsdt/cwtg/all_meanT.html]

unique(eff2$month, na.rm=TRUE) # July to november
range(eff2$tempbotm, na.rm=TRUE) # 8.6 to 31.8 C
# Range looks pretty reasonable

rm.temp <- which(eff2$tempbotm < 9)

par(mfrow = c(1,1))
plot(CPUE ~ tempbotm,
     data = eff2[-c(rm.temp),],
     xlim = c(8,33), ylim = c(0,65),
     pch = 19,
     col = rgb(0,0,0, 0.15),
     main = "",
     bty = "n")

abline(v = 9, lty = 3, col = "green")

par(new = T)

plot(CPUE ~ tempbotm,
     data = eff2[c(rm.temp),],
     xlim = c(8,33), ylim = c(0,65),
     xaxt = "n", yaxt = "n",
     xlab = "", ylab = "",
     pch = 19,
     col = rgb(1,0,0, 0.15),
     main = paste0(expression("n = "), eval(length(rm.temp)/49)," Station(s) ", "(or " , eval(length(rm.temp)), " lines of data)"),
     bty = "n")

I will remove temperatures \(less \ than \ 9 \ ^\circ{C}\). This comes out to be \(n =\) 3 station(s). NAs in temp account for 4.21 \(\%\) of the data.

Fluorobotm QA/QC

Fluorobotom IDK

range(eff2$fluorobotm, na.rm=TRUE) # 0 to 62.8

rm.fluo <- which(eff2$fluorobotm == 0 | eff2$fluorobotm > 26.5)

par(mfrow = c(1,1))
plot(CPUE ~ fluorobotm,
     data = eff2[-c(rm.fluo),],
     xlim = c(0,63), ylim = c(0,70),
     pch = 19,
     col = rgb(0,0,0, 0.15),
     main = "",
     bty = "n")

abline(v = 0, lty = 3, col = "green")

abline(v = 13, lty = 2, col = "red")

abline(v = 20, lty = 2, col = "red")

abline(v = 26.5, lty = 3, col = "green")


par(new = T)

plot(CPUE ~ fluorobotm,
     data = eff2[c(rm.fluo),],
     xlim = c(8,33), ylim = c(0,65),
     xaxt = "n", yaxt = "n",
     xlab = "", ylab = "",
     pch = 19,
     col = rgb(1,0,0, 0.15),
     main = paste0(expression("n = "), eval(length(rm.fluo)/49)," Station(s) ", "(or " , eval(length(rm.fluo)), " lines of Data)"),
     bty = "n")

(fluo.NA <- which(is.na(eff2$fluorobotm) == TRUE))

length(fluo.NA)

(fluo.NA.percent.data <- length(fluo.NA)/nrow(eff2) * 100)

table(eff2$year[c(fluo.NA)])

hist(eff2$year[c(fluo.NA)],
     xaxt = "n",
     labels = TRUE,
     ylab = "number of NAs", 
     xlab = "Year",
     main = paste0( "fluorobotm NAs represent ", eval(round( length(fluo.NA)/nrow(eff2) * 100 , 2)), " % of the Data"))

axis(1, at = seq(2001, 2017,1))

## n divided by 49 because of 49 levels of data. 

I will not remove any outliers from this because the NAs make up 36.11 \(\%\) of the data frame. I will not be using this as an explanatory variable.

Turbbotm QA/QC

Turbbotm

 ### TURBBOTM and FLUOROBOTM are not used as explanitory variables so there is no need to remove outliers

plot(CPUE ~ turbbotm,
     xlim = c(0,125), ylim = c(0,70),
     data = eff2)

abline(v = 0, lty = 2, col = "red")
abline(v = 100, lty = 2, col = "red")
abline(v = 104, lty = 2, col = "red")

range(eff2$turbbotm , na.rm=T) # 0.0000 122.3585

#(rm.Turb <- which(eff2$turbbotm > 100 | eff2$turbbotm <= 0)) # 27 individuals and 1323 lines
#(rm.Turb <- which(eff2$turbbotm > 100 )) # 19 individuals  
#(rm.Turb <- which(eff2$turbbotm > 104 )) # 5 individuals
rm.Turb <- which(eff2$turbbotm > 110 ) # 1 individual

(n.rm.turb <- length(rm.Turb)/49) ### 1 individual


plot(CPUE ~ turbbotm,
     xlim = c(0,125), ylim = c(0,70),
     data = eff2[-c(rm.Turb),],
     pch = 19,
     col = rgb(0,0,0, 0.25))

#abline(v = 0, lty = 3, col = "green")
abline(v = 100, lty = 2, col = "red")

abline(v = 104, lty = 2, col = "red")
abline(v = 110, lty = 3, col = "green")

par(new = T)

plot(CPUE ~ turbbotm,
     data = eff2[c(rm.Turb),],
     xlim = c(0,125), ylim = c(0,70),
     xlab = "", ylab = "",
     yaxt = "n", xaxt = "n",
     pch = 19,
     col = rgb(1,0,0, 0.25),
     main = paste0(expression("n = "), eval(length(rm.Turb)/49)," Station(s) ", "(or " , eval(length(rm.Turb)), " lines of Data)"))

NAs in Turbbotm represent 6.48 \(\%\) of the data. This seems like a lot but I will include this as an explanatory variable. I am only removing turbidity \(greater \ than \ 110\) which results in the removal of \(1 \ station\) (if I remove turbidity greater than 104 this removes 5 stations and above 100 removes 27 stations).

Oxybotm QA/QC

Oxybotm Hypoxia near Louisiana coast where DO is less than 2 ppm [https://toxics.usgs.gov/hypoxia/hypoxic_zone.html], Ranges 0 to 8 (Summer) [https://www.ncddc.noaa.gov/hypoxia/products/2010/]

#eff2[c(rm.Sal),]

### oxybotm

range(eff2$oxybotm, na.rm = TRUE) # 0 to 10.7

rm.oxy <- which(eff2$oxybotm == 0)

#eff2[c(oxy),]

plot(CPUE ~ oxybotm,
     data = eff2[-c(rm.oxy),],
     xlim = c(0,11), ylim = c(0,70),
     pch = 19,
     col = rgb(0,0,0, 0.15),
     bty = "n")

par(new = T)
plot(CPUE ~ oxybotm,
     data = eff2[c(rm.oxy),],
     xlim = c(0,11), ylim = c(0,70),
     xlab = "", ylab = "",
     xaxt = "n", yaxt = "n",
     pch = 19,
     col = rgb(1,0,0, 0.25),
     main = paste0(expression("n = "), eval(length(rm.oxy)/49)," Station(s) ", "(or " , eval(length(rm.oxy)), " lines of Data)"),
     bty = "n")

#abline(v = 9, col = "red", lty = 2)
#abline(v = 10.5, col = "red", lty = 2)


#(rm.Oxy <- which(eff2$oxybotm > 12 | eff2$oxybotm == 0)) ### 99 100 338 339 411 625 652 823 824 

I’m not going to remove any oxybotm these seem fairly reasonable even though there are some in hypoxic waters. NAs from oxybotm represent 4.35 \(\%\) of the data.

Salinity QA/QC

Salinity Ranges between 32 to 36.5 (WINTER) and 31 to 36.5 (SUMMER) on average in the northern GoM. [https://gulfatlas.noaa.gov/catalog/products/physical/seawater-salinity/index.html]

range(eff2$salbotm, na.rm = TRUE) # 17.9 to 48.8

      ## was less than 15
#(rm.Sal <- which(eff2$salbotm > 40 | eff2$salbotm < 20)) ### 507  932 2705 2770 2771 2941 2979
rm.Sal <- which(eff2$salbotm > 38 | eff2$salbotm < 23)

par(mfrow = c(1,1))
plot(CPUE ~ salbotm,
     data = eff2[-c(rm.Sal),],
     xlim = c(15, 50), ylim = c(0,70),
     pch = 19,
     col = rgb(0,0,0, 0.15),
     main = "",
     bty = "n")

par(new = T)

plot(CPUE ~ jitter(salbotm, amount = 0.5),
     data = eff2[c(rm.Sal),],
     xlim = c(15, 50), ylim = c(0,70),
     xlab = "", ylab = "",
     xaxt = "n", yaxt = "n",
     pch = 19,
     col = rgb(1,0,0, 0.15),
     main = paste0(expression("n = "), eval(length(rm.Sal)/49)," Station(s) ", "(or " , eval(length(rm.Sal)), " lines of Data)"),
     bty = "n" )

abline(v = 40, col = "red", lty = 2)
abline(v = 20, col = "red", lty = 2)

abline(v = 38, col = "green", lty = 3)
abline(v = 23, col = "green", lty = 3)

#text(x= eff2$salbotm[c(rm.Sal)], y = eff2$CPUE[c(rm.Sal)] ,
#      labels = as.character(round(eff2$salbotm[c(rm.Sal)],1)),
#     pos = 3)

I will remove salinity \(less \ than \ 23\) and \(greater \ than \ 38\) which will result in the removal of 5 stations. NAs in Salinity account for 4.25 \(\%\) of the data.

4. remove outliers and create individual data frames for each species save as .csv file

#eff2[c(rm.Turb),]

#(rm.Fluo <-which(eff2$FLUOROBOTM > 30)) ### 1536, 1802, 2230, 2660, 2704

(n.lines.rm <- length(rm.temp) + length(rm.Turb) + length(rm.Sal))
## [1] 441
(n.rm <- (length(rm.temp) + length(rm.Turb) + length(rm.Sal))/49 ) ### individuals
## [1] 9
  ### round 1 ###rm.EndLon
rm1 <- c(rm.temp, rm.Turb, rm.Sal) %>%
   sort()

length(rm1)
length(unique(rm1))

nrow(eff2)

eff2 <- eff2[-c(rm1),]
######

nrow(eff2)

str(eff2)

#8-15-18#write_csv_fn( eff2, file = "data/raw-data/CPUE3.csv", row.names = FALSE, proj)

5. Load newly created .csv files and save to sp.df list

eff3 <- read_csv_fn( "data/raw-data/CPUE3.csv", proj)
load_data <- function(data = data, strata = 1){
  
  require(FSA)
  require(magrittr)
  
  Species_Vec <- levels(data$common)
  Female <- vector("list", length = length(data))
  Male <- vector("list", length = length(data))
  
  
  if(strata == 1){
    
  Female <- list(blacknose =  filterD(data, common == Species_Vec[1], sex == "F"),
                                       
                                       blacktip = filterD(data, common == Species_Vec[2], sex == "F"),
              
                                       bull = filterD(data, common == Species_Vec[3], sex == "F"),
              
                                       sandbar = filterD(data, common == Species_Vec[4], sex == "F"),
              
                                       sharpnose = filterD(data, common == Species_Vec[5], sex == "F"),
              
                                       spinner = filterD(data, common == Species_Vec[6], sex == "F"),
              
                                       tiger = filterD(data, common == Species_Vec[7], sex == "F"))
  
  
  Male <- list(blacknose =  filterD(data, common == Species_Vec[1], sex == "M"),
                                     
                                     blacktip = filterD(data, common == Species_Vec[2], sex == "M"),
              
                                     bull = filterD(data, common == Species_Vec[3], sex == "M"),
              
                                     sandbar = filterD(data, common == Species_Vec[4], sex == "M"),
              
                                     sharpnose = filterD(data, common == Species_Vec[5], sex == "M"),
              
                                     spinner = filterD(data, common == Species_Vec[6], sex == "M"),
              
                                     tiger = filterD(data, common == Species_Vec[7], sex == "M"))
  
    output <- list(Female = Female,
                 Male = Male)
    
  } else if(strata == 2){
    
  Female <- list(blacknose =  filterD(data, common == Species_Vec[1], pres == 1, sex == "F"),
                                       
                                       blacktip = filterD(data, common == Species_Vec[2], pres == 1, sex == "F"),
              
                                       bull = filterD(data, common == Species_Vec[3], pres == 1, sex == "F"),
              
                                       sandbar = filterD(data, common == Species_Vec[4], pres == 1, sex == "F"),
              
                                       sharpnose = filterD(data, common == Species_Vec[5], pres == 1, sex == "F"),
              
                                       spinner = filterD(data, common == Species_Vec[6], pres == 1, sex == "F"),
              
                                       tiger = filterD(data, common == Species_Vec[7], pres == 1, sex == "F"))
  
  
  Male <- list(blacknose =  filterD(data, common == Species_Vec[1], pres == 1, sex == "M"),
                                     
                                     blacktip = filterD(data, common == Species_Vec[2], pres == 1, sex == "M"),
              
                                     bull = filterD(data, common == Species_Vec[3], pres == 1, sex == "M"),
              
                                     sandbar = filterD(data, common == Species_Vec[4], pres == 1, sex == "M"),
              
                                     sharpnose = filterD(data, common == Species_Vec[5], pres == 1, sex == "M"),
              
                                     spinner = filterD(data, common == Species_Vec[6], pres == 1, sex == "M"),
              
                                     tiger = filterD(data, common == Species_Vec[7], pres == 1, sex == "M"))
  
  output <- list(Female = Female,
                 Male = Male)
    
  } else stop("Strata must equal either 1 or 2 !!!")
  
  return(output)
}
test_strata1 <- load_data(data = eff3, strata = 1)

str(test_strata1$Female$blacknose)
## 'data.frame':    8589 obs. of  21 variables:
##  $ year        : int  2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 ...
##  $ month       : Factor w/ 5 levels "Aug","Jul","Nov",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ startlat    : num  27.5 27.5 27.5 26.7 26.7 ...
##  $ startlon    : num  -97.2 -97.2 -97.2 -97.2 -97.2 ...
##  $ start_gmt   : Factor w/ 602 levels "2001-07-31","2001-08-01",..: 22 22 22 22 22 22 21 21 21 23 ...
##  $ startdepth  : num  16.3 16.3 16.3 26.9 26.9 ...
##  $ tempbotm    : num  28.8 28.8 28.8 23.8 23.8 ...
##  $ fluorobotm  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ turbbotm    : num  70.5 70.5 70.5 58.6 58.6 ...
##  $ oxybotm     : num  5.6 5.6 5.6 5.6 5.6 5.6 6 6 6 5.9 ...
##  $ salbotm     : num  36.6 36.6 36.6 36.5 36.5 ...
##  $ bottype4    : Factor w/ 5 levels "","Gravel","Mud",..: 5 5 5 3 3 3 3 3 3 5 ...
##  $ Dis.to.SHORE: num  4.16 4.16 4.16 16.12 16.12 ...
##  $ TAXON       : Factor w/ 3 levels "C_ACR_4_FA","C_ACR_4_FJ",..: 1 2 3 1 2 3 1 2 3 1 ...
##  $ CPUE        : num  0 0 0 0 0 ...
##  $ Adj.CPUE    : num  0 0 0 0 0 ...
##  $ species     : Factor w/ 1 level "C_ACR": 1 1 1 1 1 1 1 1 1 1 ...
##  $ sex         : Factor w/ 1 level "F": 1 1 1 1 1 1 1 1 1 1 ...
##  $ maturity    : Factor w/ 3 levels "A","J","U": 1 2 3 1 2 3 1 2 3 1 ...
##  $ common      : Factor w/ 1 level "Blacknose": 1 1 1 1 1 1 1 1 1 1 ...
##  $ pres        : int  0 0 0 0 0 0 1 0 0 0 ...
test_strata2 <- load_data(data = eff3, strata = 2)

str(test_strata2$Female$blacknose)
## 'data.frame':    525 obs. of  21 variables:
##  $ year        : int  2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 ...
##  $ month       : Factor w/ 5 levels "Aug","Jul","Nov",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ startlat    : num  27.5 27.4 28.3 28.4 28.4 ...
##  $ startlon    : num  -97 -96.8 -95.6 -95.4 -95.4 ...
##  $ start_gmt   : Factor w/ 283 levels "2001-08-02","2001-08-06",..: 11 11 9 10 10 9 6 10 10 6 ...
##  $ startdepth  : num  27.6 43.5 28.5 29.4 29.4 ...
##  $ tempbotm    : num  27.6 26 22.4 26 26 ...
##  $ fluorobotm  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ turbbotm    : num  74.8 75.2 NA 70.4 70.4 ...
##  $ oxybotm     : num  6 6.2 5.1 5.5 5.5 5 5.1 4.4 4.4 5.4 ...
##  $ salbotm     : num  36.6 36.6 35.3 36.3 36.3 ...
##  $ bottype4    : Factor w/ 5 levels "","Gravel","Mud",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ Dis.to.SHORE: num  16.6 32.2 35.9 36.6 36.6 ...
##  $ TAXON       : Factor w/ 3 levels "C_ACR_4_FA","C_ACR_4_FJ",..: 1 1 2 1 2 1 1 1 2 1 ...
##  $ CPUE        : num  0.968 0.984 0.984 0.952 0.952 ...
##  $ Adj.CPUE    : num  1.05 1.11 1.07 1.03 1.03 ...
##  $ species     : Factor w/ 1 level "C_ACR": 1 1 1 1 1 1 1 1 1 1 ...
##  $ sex         : Factor w/ 1 level "F": 1 1 1 1 1 1 1 1 1 1 ...
##  $ maturity    : Factor w/ 3 levels "A","J","U": 1 1 2 1 2 1 1 1 2 1 ...
##  $ common      : Factor w/ 1 level "Blacknose": 1 1 1 1 1 1 1 1 1 1 ...
##  $ pres        : int  1 1 1 1 1 1 1 1 1 1 ...
str(test_strata2$Male$sharpnose)
## 'data.frame':    1270 obs. of  21 variables:
##  $ year        : int  2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 ...
##  $ month       : Factor w/ 5 levels "Aug","Jul","Nov",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ startlat    : num  27.5 26 27 27.4 26.3 ...
##  $ startlon    : num  -97 -97 -96.8 -96.8 -96.5 ...
##  $ start_gmt   : Factor w/ 422 levels "2001-07-31","2001-08-01",..: 16 18 17 16 19 18 16 15 15 16 ...
##  $ startdepth  : num  27.6 26.9 58.7 43.5 63.1 ...
##  $ tempbotm    : num  27.6 23.6 23.2 26 21.1 ...
##  $ fluorobotm  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ turbbotm    : num  74.8 57.1 62.5 75.2 65.6 ...
##  $ oxybotm     : num  6 5.8 6 6.2 4.8 4.1 5.8 5.5 6.1 6 ...
##  $ salbotm     : num  36.6 36.5 36.5 36.6 36.5 ...
##  $ bottype4    : Factor w/ 5 levels "","Gravel","Mud",..: 3 5 3 3 3 3 3 3 3 3 ...
##  $ Dis.to.SHORE: num  16.6 11.6 44.4 32.2 61.8 ...
##  $ TAXON       : Factor w/ 3 levels "R_TER_4_MA","R_TER_4_MJ",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ CPUE        : num  0.968 0.857 10.312 0.984 1 ...
##  $ Adj.CPUE    : num  1.046 0.917 12.008 1.11 1.191 ...
##  $ species     : Factor w/ 1 level "R_TER": 1 1 1 1 1 1 1 1 1 1 ...
##  $ sex         : Factor w/ 1 level "M": 1 1 1 1 1 1 1 1 1 1 ...
##  $ maturity    : Factor w/ 3 levels "A","J","U": 1 1 1 1 1 1 1 1 1 1 ...
##  $ common      : Factor w/ 1 level "Sharpnose": 1 1 1 1 1 1 1 1 1 1 ...
##  $ pres        : int  1 1 1 1 1 1 1 1 1 1 ...
### Mature Adults from strata 1

test_Adult_strata1 <- load_data(data = eff3[eff3$maturity == "A",]%>% droplevels(), strata = 1) 

str(test_Adult_strata1$Female$blacknose)
## 'data.frame':    2863 obs. of  21 variables:
##  $ year        : int  2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 ...
##  $ month       : Factor w/ 5 levels "Aug","Jul","Nov",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ startlat    : num  27.5 26.7 27.5 26 26 ...
##  $ startlon    : num  -97.2 -97.2 -97 -97 -97 ...
##  $ start_gmt   : Factor w/ 602 levels "2001-07-31","2001-08-01",..: 22 22 21 23 23 22 22 21 23 20 ...
##  $ startdepth  : num  16.3 26.9 27.6 26.5 26.9 ...
##  $ tempbotm    : num  28.8 23.8 27.6 23.6 23.6 ...
##  $ fluorobotm  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ turbbotm    : num  70.5 58.6 74.8 59.7 57.1 ...
##  $ oxybotm     : num  5.6 5.6 6 5.9 5.8 6 6 6.2 6.1 5.4 ...
##  $ salbotm     : num  36.6 36.5 36.6 36.5 36.5 ...
##  $ bottype4    : Factor w/ 5 levels "","Gravel","Mud",..: 5 3 3 5 5 3 3 3 3 3 ...
##  $ Dis.to.SHORE: num  4.16 16.12 16.63 11.14 11.64 ...
##  $ TAXON       : Factor w/ 1 level "C_ACR_4_FA": 1 1 1 1 1 1 1 1 1 1 ...
##  $ CPUE        : num  0 0 0.968 0 0 ...
##  $ Adj.CPUE    : num  0 0 1.05 0 0 ...
##  $ species     : Factor w/ 1 level "C_ACR": 1 1 1 1 1 1 1 1 1 1 ...
##  $ sex         : Factor w/ 1 level "F": 1 1 1 1 1 1 1 1 1 1 ...
##  $ maturity    : Factor w/ 1 level "A": 1 1 1 1 1 1 1 1 1 1 ...
##  $ common      : Factor w/ 1 level "Blacknose": 1 1 1 1 1 1 1 1 1 1 ...
##  $ pres        : int  0 0 1 0 0 0 0 1 0 0 ...
str(test_Adult_strata1$Male$blacknose)
## 'data.frame':    2863 obs. of  21 variables:
##  $ year        : int  2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 ...
##  $ month       : Factor w/ 5 levels "Aug","Jul","Nov",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ startlat    : num  27.5 26.7 27.5 26 26 ...
##  $ startlon    : num  -97.2 -97.2 -97 -97 -97 ...
##  $ start_gmt   : Factor w/ 602 levels "2001-07-31","2001-08-01",..: 22 22 21 23 23 22 22 21 23 20 ...
##  $ startdepth  : num  16.3 26.9 27.6 26.5 26.9 ...
##  $ tempbotm    : num  28.8 23.8 27.6 23.6 23.6 ...
##  $ fluorobotm  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ turbbotm    : num  70.5 58.6 74.8 59.7 57.1 ...
##  $ oxybotm     : num  5.6 5.6 6 5.9 5.8 6 6 6.2 6.1 5.4 ...
##  $ salbotm     : num  36.6 36.5 36.6 36.5 36.5 ...
##  $ bottype4    : Factor w/ 5 levels "","Gravel","Mud",..: 5 3 3 5 5 3 3 3 3 3 ...
##  $ Dis.to.SHORE: num  4.16 16.12 16.63 11.14 11.64 ...
##  $ TAXON       : Factor w/ 1 level "C_ACR_4_MA": 1 1 1 1 1 1 1 1 1 1 ...
##  $ CPUE        : num  0 0 0.968 0 0 ...
##  $ Adj.CPUE    : num  0 0 1.05 0 0 ...
##  $ species     : Factor w/ 1 level "C_ACR": 1 1 1 1 1 1 1 1 1 1 ...
##  $ sex         : Factor w/ 1 level "M": 1 1 1 1 1 1 1 1 1 1 ...
##  $ maturity    : Factor w/ 1 level "A": 1 1 1 1 1 1 1 1 1 1 ...
##  $ common      : Factor w/ 1 level "Blacknose": 1 1 1 1 1 1 1 1 1 1 ...
##  $ pres        : int  0 0 1 0 0 0 0 0 0 0 ...
str(test_Adult_strata1$Female$sharpnose)
## 'data.frame':    2863 obs. of  21 variables:
##  $ year        : int  2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 ...
##  $ month       : Factor w/ 5 levels "Aug","Jul","Nov",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ startlat    : num  27.5 26.7 27.5 26 26 ...
##  $ startlon    : num  -97.2 -97.2 -97 -97 -97 ...
##  $ start_gmt   : Factor w/ 602 levels "2001-07-31","2001-08-01",..: 22 22 21 23 23 22 22 21 23 20 ...
##  $ startdepth  : num  16.3 26.9 27.6 26.5 26.9 ...
##  $ tempbotm    : num  28.8 23.8 27.6 23.6 23.6 ...
##  $ fluorobotm  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ turbbotm    : num  70.5 58.6 74.8 59.7 57.1 ...
##  $ oxybotm     : num  5.6 5.6 6 5.9 5.8 6 6 6.2 6.1 5.4 ...
##  $ salbotm     : num  36.6 36.5 36.6 36.5 36.5 ...
##  $ bottype4    : Factor w/ 5 levels "","Gravel","Mud",..: 5 3 3 5 5 3 3 3 3 3 ...
##  $ Dis.to.SHORE: num  4.16 16.12 16.63 11.14 11.64 ...
##  $ TAXON       : Factor w/ 1 level "R_TER_4_FA": 1 1 1 1 1 1 1 1 1 1 ...
##  $ CPUE        : num  0 0 0 51 30 ...
##  $ Adj.CPUE    : num  0 0 0 55.1 32.1 ...
##  $ species     : Factor w/ 1 level "R_TER": 1 1 1 1 1 1 1 1 1 1 ...
##  $ sex         : Factor w/ 1 level "F": 1 1 1 1 1 1 1 1 1 1 ...
##  $ maturity    : Factor w/ 1 level "A": 1 1 1 1 1 1 1 1 1 1 ...
##  $ common      : Factor w/ 1 level "Sharpnose": 1 1 1 1 1 1 1 1 1 1 ...
##  $ pres        : int  0 0 0 1 1 1 1 1 0 0 ...
str(test_Adult_strata1$Male$sharpnose)
## 'data.frame':    2863 obs. of  21 variables:
##  $ year        : int  2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 ...
##  $ month       : Factor w/ 5 levels "Aug","Jul","Nov",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ startlat    : num  27.5 26.7 27.5 26 26 ...
##  $ startlon    : num  -97.2 -97.2 -97 -97 -97 ...
##  $ start_gmt   : Factor w/ 602 levels "2001-07-31","2001-08-01",..: 22 22 21 23 23 22 22 21 23 20 ...
##  $ startdepth  : num  16.3 26.9 27.6 26.5 26.9 ...
##  $ tempbotm    : num  28.8 23.8 27.6 23.6 23.6 ...
##  $ fluorobotm  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ turbbotm    : num  70.5 58.6 74.8 59.7 57.1 ...
##  $ oxybotm     : num  5.6 5.6 6 5.9 5.8 6 6 6.2 6.1 5.4 ...
##  $ salbotm     : num  36.6 36.5 36.6 36.5 36.5 ...
##  $ bottype4    : Factor w/ 5 levels "","Gravel","Mud",..: 5 3 3 5 5 3 3 3 3 3 ...
##  $ Dis.to.SHORE: num  4.16 16.12 16.63 11.14 11.64 ...
##  $ TAXON       : Factor w/ 1 level "R_TER_4_MA": 1 1 1 1 1 1 1 1 1 1 ...
##  $ CPUE        : num  0 0 0.968 0 0.857 ...
##  $ Adj.CPUE    : num  0 0 1.046 0 0.917 ...
##  $ species     : Factor w/ 1 level "R_TER": 1 1 1 1 1 1 1 1 1 1 ...
##  $ sex         : Factor w/ 1 level "M": 1 1 1 1 1 1 1 1 1 1 ...
##  $ maturity    : Factor w/ 1 level "A": 1 1 1 1 1 1 1 1 1 1 ...
##  $ common      : Factor w/ 1 level "Sharpnose": 1 1 1 1 1 1 1 1 1 1 ...
##  $ pres        : int  0 0 1 0 1 0 1 1 0 0 ...
### Good

6. make new cpue plots and make sure outliers are gone.

fem <- test_strata1$Female
mal <- test_strata1$Male

str(fem$blacknose)
## 'data.frame':    8589 obs. of  21 variables:
##  $ year        : int  2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 ...
##  $ month       : Factor w/ 5 levels "Aug","Jul","Nov",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ startlat    : num  27.5 27.5 27.5 26.7 26.7 ...
##  $ startlon    : num  -97.2 -97.2 -97.2 -97.2 -97.2 ...
##  $ start_gmt   : Factor w/ 602 levels "2001-07-31","2001-08-01",..: 22 22 22 22 22 22 21 21 21 23 ...
##  $ startdepth  : num  16.3 16.3 16.3 26.9 26.9 ...
##  $ tempbotm    : num  28.8 28.8 28.8 23.8 23.8 ...
##  $ fluorobotm  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ turbbotm    : num  70.5 70.5 70.5 58.6 58.6 ...
##  $ oxybotm     : num  5.6 5.6 5.6 5.6 5.6 5.6 6 6 6 5.9 ...
##  $ salbotm     : num  36.6 36.6 36.6 36.5 36.5 ...
##  $ bottype4    : Factor w/ 5 levels "","Gravel","Mud",..: 5 5 5 3 3 3 3 3 3 5 ...
##  $ Dis.to.SHORE: num  4.16 4.16 4.16 16.12 16.12 ...
##  $ TAXON       : Factor w/ 3 levels "C_ACR_4_FA","C_ACR_4_FJ",..: 1 2 3 1 2 3 1 2 3 1 ...
##  $ CPUE        : num  0 0 0 0 0 ...
##  $ Adj.CPUE    : num  0 0 0 0 0 ...
##  $ species     : Factor w/ 1 level "C_ACR": 1 1 1 1 1 1 1 1 1 1 ...
##  $ sex         : Factor w/ 1 level "F": 1 1 1 1 1 1 1 1 1 1 ...
##  $ maturity    : Factor w/ 3 levels "A","J","U": 1 2 3 1 2 3 1 2 3 1 ...
##  $ common      : Factor w/ 1 level "Blacknose": 1 1 1 1 1 1 1 1 1 1 ...
##  $ pres        : int  0 0 0 0 0 0 1 0 0 0 ...
str(mal$blacknose)
## 'data.frame':    8589 obs. of  21 variables:
##  $ year        : int  2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 ...
##  $ month       : Factor w/ 5 levels "Aug","Jul","Nov",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ startlat    : num  27.5 27.5 27.5 26.7 26.7 ...
##  $ startlon    : num  -97.2 -97.2 -97.2 -97.2 -97.2 ...
##  $ start_gmt   : Factor w/ 602 levels "2001-07-31","2001-08-01",..: 22 22 22 22 22 22 21 21 21 23 ...
##  $ startdepth  : num  16.3 16.3 16.3 26.9 26.9 ...
##  $ tempbotm    : num  28.8 28.8 28.8 23.8 23.8 ...
##  $ fluorobotm  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ turbbotm    : num  70.5 70.5 70.5 58.6 58.6 ...
##  $ oxybotm     : num  5.6 5.6 5.6 5.6 5.6 5.6 6 6 6 5.9 ...
##  $ salbotm     : num  36.6 36.6 36.6 36.5 36.5 ...
##  $ bottype4    : Factor w/ 5 levels "","Gravel","Mud",..: 5 5 5 3 3 3 3 3 3 5 ...
##  $ Dis.to.SHORE: num  4.16 4.16 4.16 16.12 16.12 ...
##  $ TAXON       : Factor w/ 3 levels "C_ACR_4_MA","C_ACR_4_MJ",..: 1 2 3 1 2 3 1 2 3 1 ...
##  $ CPUE        : num  0 0 0 0 0 ...
##  $ Adj.CPUE    : num  0 0 0 0 0 ...
##  $ species     : Factor w/ 1 level "C_ACR": 1 1 1 1 1 1 1 1 1 1 ...
##  $ sex         : Factor w/ 1 level "M": 1 1 1 1 1 1 1 1 1 1 ...
##  $ maturity    : Factor w/ 3 levels "A","J","U": 1 2 3 1 2 3 1 2 3 1 ...
##  $ common      : Factor w/ 1 level "Blacknose": 1 1 1 1 1 1 1 1 1 1 ...
##  $ pres        : int  0 0 0 0 0 0 1 0 0 0 ...